% Estimate (G)PME and alphas for bootstrapped individual PE funds
%
% Bootstrapped funds for a range of (true) betas, with an added "alpha" to
% the very first cash flow
%
% GPME uses the exponential-affine factor model:
%   M(t+1) = exp[f(t+1) * b]

% The constructed variables are defined as follows:
%   PEfund_fundid   = Nx1 vector with fund identifiers
%   PEfund_firmid   = Nx1 vector with firm identifiers
%   PEfund_seqnr    = Nx1 vector with fund sequence number for a given firm
%   PEfund_vintage  = Nx1 vector with fund vintage years
%   PEfund_size     = Nx1 vector with size of fund in $mln for each fund 1..N
%   PEfund_size1990 = Nx1 vector with size of fund in 1990 mlns U.S. dollars for each fund 1..N (using CPIAUCSL from FRED to correct for inflation)
%   PEfund_nextfundid   = Nx1 vector with fund id of next fund of same type by same GP (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_nextvintage  = Nx1 vector with next fund's vintage year of same type by same GP (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_nextsize     = Nx1 vector with size in $mln of next fund of same type by same GP (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_nextsize1990 = Nx1 vector with size of next fund of same type by same GP, in 1990 mlns U.S. (NaN if there is no next fund of same PE firm and type in the data)
%   PEfund_numCF    = Nx1 vector with number of observed net cash flow dates (calls, distributions, and the final NAV) for each fund 1..N
%   PEfund_type     = Nx1 vector with fund type (VC = 1, Buyout = 2, Expansion Capital ("Growth Equity") = 3)
%   PEfund_subtype  = Nx1 vector with type of VC (1 = balanced; 2 = early-stage; 3 = late-stage)
%   PEfund_CF       = NxJ matrix of cash flows, ending with the final observed NAV (if not zero). Padded with zeros at the end of each row.
%   PEfund_datenums = NxJ matrix of dates of cash flows, in Matlab datenum format. Padded with zeros at the end of each row.
%   PEfund_T        = NxJ with time since fund firstdate and the date of the corresponding entry in PEfund_CF (in years)
%   PEfund_finalNAV = Nx1 vector of last stated NAV (if = 0 then fund is fully liquidated)
%   PEfund_liq100   = Nx1 vector, =1 if fund is fully (100%) liquidated (final NAV = 0 and fund is not being raised)
%   PEfund_liq95    = Nx1 vector, =1 if final NAV < 5% of fund size (and fund is not being raised)
%   PEfund_liq90    = Nx1 vector, =1 if final NAV < 10% of fund size (and fund is not being raised)
%   PEfund_TVPI     = Nx1 vector with fund TVPIs
%   PEfund_PME      = Nx1 vector with fund PMEs (calculated the traditional Kaplan-Schoar way)
%   PEfund_IRR      = Nx1 vector with fund IRRs (calculated the Preqin way, i.e., capping cash flow times at 10 years from initial cashflow)
%   PEfund_Rf       = NxJ matrix of risk-free asset return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic, compounded 1-month risk-free rates)
%   PEfund_Rm       = market return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_Rsmb     = SMB return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_Rhml     = HML return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RS       = return on small-firms portfolio, computed as compounded return on (SL+SM+SH)/3
%   PEfund_RS2      = return on small-firms portfolio, computed as compounded return on (rf+rSMB)
%   PEfund_RSL      = Small-Low B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RSM      = Small-Medium B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RSH      = Small-High B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RBL      = Big-Low B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RBM      = Big-Medium B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RBH      = Big-High B/M return between fund firstdate and date of corresponding entry in PEfund_CF (arithmetic)
%   PEfund_RmVar    = NxJ matrix of variance of market returns since fund inception, from daily data (used for computing individual fund alphas and betas)
%
% NB: all returns are arithmetic, gross returns, i.e. 1.05 means 5% return.
%
% A fund is considered VC if its "Asset_Class" = "Venture Capital"
% A fund is considered Buyout if its "Sub_Asset_Class" is "Buyout"

clear all
close all
clc

debug       = true;     % true = use fake data file; false = use real Burgiss data

maxdate     = datenum('2022/03/31');   % 2022Q1 data; delete any dates after quarter-end

maxfunds    = 3500;     % max # funds in data (used to initialize variables)
maxnumCF    = 500;      % max # cash flows per fund (used to initialize variables)

if debug
    indir   = '';
    inname  = 'FakeData.xlsx';
    outdir  = indir;
else
    indir   = 'D:\Projects\Arthur\Exchange\';
    inname  = 'TransData2022Q1.csv';
    outdir  = 'D:\Projects\Arthur\Exchange\Out\';
end

% initialize diary
if exist([outdir 'Burgiss_sim_log_' datestr(date,'yyyymmdd') '.txt'],'file')
    delete([outdir 'Burgiss_sim_log_' datestr(date,'yyyymmdd') '.txt']);    % delete diary file, if it exists
end
diary([outdir 'Burgiss_sim_log_' datestr(date,'yyyymmdd') '.txt']);
diary on            % this will save the screen output

% Set options for treatment of data
includeNAV  = 1;    % 0 = do not include final NAV; 1 = include final NAV

try                 % catch errors so the diary can be closed and saved


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Load Burgiss data 
%
% Contains the variables:
%     FundID, TransactionID, Inception_Date, Vintage, 
%     Asset_Class1, Asset_Class2, Asset_Class3, Geographic_Focus, 
%     Sub_Geographic_Focus, Currency, Type, Fund_Size, Date, Amount, FirmID
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
tic

% read the PE fund cash flow data 
% NB: MAKE SURE THIS FILE IS SORTED FIRST BY FUND ID, SECOND BY TRANSACTION DATE!
if debug
    % read the PE fund cash flow data text file
    inCF = readtable('FakeData.xlsx');
    inCF_fundid     = inCF.FundID;              % fund identifier
    inCF_vintage    = inCF.Vintage;             % vintage year
    inCF_class      = inCF.Class;               % VC, Buyout, ...
    inCF_subclass   = inCF.SubClass;            % for VC: early stage, late stage, generalist
    inCF_subfocus   = inCF.SubFocus;            % geography: to filter out non-US funds
    inCF_size       = inCF.FundSizeUSD;         % fund size
    inCF_transdate  = inCF.Date;                % calendar dates of contributions, distributions, valuations
    inCF_transtype  = inCF.TransactionType;     % type of transaction (contribution, distribution, valuation)
    inCF_transamt   = inCF.AmountUSD;           % amount of transaction in USD
    inCF_firmid     = inCF.FirmID;              % GP identifier
else
    [inCF_fundid, ~, inCF_vintage, ~, inCF_class, inCF_subclass, ~, ~, inCF_subfocus, ~, ~, ~, inCF_size, inCF_transdate, inCF_transtype, ~, inCF_transamt, inCF_firmid, ~] ...
        = textread([indir inname],'%n%s%n%s%s%s%s%s%s%s%s%n%n%s%s%f%f%n%s','headerlines',1,'delimiter',',','emptyvalue',NaN);
end

% read 5 factor return data
s = textread('F-F_Research_Data_5_Factors_2x3_daily.csv','','headerlines',4,'delimiter',',','emptyvalue',NaN);
FF_date = s(:,1);
FF_datenum = datenum(floor(s(:,1)/10000), floor(s(:,1)/100)-100*floor(s(:,1)/10000),rem(s(:,1),100));
FF_rmrf = s(:,2)/100;    % daily rm-rf
FF_rSMB = s(:,3)/100;    % daily return to SMB ptf
FF_rHML = s(:,4)/100;    % daily return to HML ptf
FF_rRMW = s(:,5)/100;    % daily return to RMW ptf
FF_rCMA = s(:,6)/100;    % daily return to CMA ptf
FF_rf = s(:,7)/100;      % daily rf
FF_rm = FF_rf + FF_rmrf;

% read size/BM portfolio return data
s = textread('6_Portfolios_2x3_daily.csv','','headerlines',19,'delimiter',',','emptyvalue',NaN);
FF6_datenum = datenum(floor(s(:,1)/10000), floor(s(:,1)/100)-100*floor(s(:,1)/10000),rem(s(:,1),100));
FF6_SL = s(:,2)/100;    % small-low b/m portfolio returns
FF6_SM = s(:,3)/100;    % small-medium b/m portfolio returns
FF6_SH = s(:,4)/100;    % small-high b/m portfolio returns
FF6_BL = s(:,5)/100;    % big-low b/m portfolio returns
FF6_BM = s(:,6)/100;    % big-medium b/m portfolio returns
FF6_BH = s(:,7)/100;    % big-high b/m portfolio returns

% read op/size portfolio return data
s = textread('6_Portfolios_ME_OP_2x3_daily.csv','','headerlines',23,'delimiter',',','emptyvalue',NaN);
FF6MEOP_datenum = datenum(floor(s(:,1)/10000), floor(s(:,1)/100)-100*floor(s(:,1)/10000),rem(s(:,1),100));
FF6_SLop = s(:,2)/100;    % small-low profit portfolio returns
FF6_SMop = s(:,3)/100;    % small-medium profit portfolio returns
FF6_SHop = s(:,4)/100;    % small-high profit portfolio returns
FF6_BLop = s(:,5)/100;    % big-low profit portfolio returns
FF6_BMop = s(:,6)/100;    % big-medium profit portfolio returns
FF6_BHop = s(:,7)/100;    % big-high profit portfolio returns

% read the inflation data
[CPI_date, CPI] = textread([indir 'CPIAUCSL.csv'],'%d%f','headerlines',1,'delimiter',',','emptyvalue',NaN);

% initialize data variables
PEfund_fundid   = zeros(maxfunds,1);
PEfund_firmid   = ones(maxfunds,1)*NaN;
PEfund_seqnr    = ones(maxfunds,1)*NaN;
PEfund_vintage  = zeros(maxfunds,1);
PEfund_size     = zeros(maxfunds,1);
PEfund_size1990 = zeros(maxfunds,1);
PEfund_type     = zeros(maxfunds,1);
PEfund_subtype  = zeros(maxfunds,1);
PEfund_finalNAV = zeros(maxfunds,1);
PEfund_nextfundid   = ones(maxfunds,1)*NaN;
PEfund_nextvintage  = ones(maxfunds,1)*NaN;
PEfund_nextsize     = ones(maxfunds,1)*NaN;
PEfund_nextsize1990 = ones(maxfunds,1)*NaN;
PEfund_numCF    = zeros(maxfunds,1);
PEfund_datenums = zeros(maxfunds,maxnumCF);
PEfund_T        = zeros(maxfunds,maxnumCF);
PEfund_CF       = zeros(maxfunds,maxnumCF);
PEfund_liq90    = zeros(maxfunds,1);
PEfund_liq95    = zeros(maxfunds,1);
PEfund_liq100   = zeros(maxfunds,1);
PEfund_TVPI     = zeros(maxfunds,1);
PEfund_PME      = zeros(maxfunds,1);
PEfund_IRR      = zeros(maxfunds,1);
PEfund_Rf       = zeros(maxfunds,maxnumCF);
PEfund_Rm       = zeros(maxfunds,maxnumCF);
PEfund_IIstrt   = NaN(maxfunds,maxnumCF);
PEfund_IIend    = NaN(maxfunds,maxnumCF);
PEfund_Rsmb     = zeros(maxfunds,maxnumCF);
PEfund_Rhml     = zeros(maxfunds,maxnumCF);
PEfund_Rrmw     = zeros(maxfunds,maxnumCF);
PEfund_Rcma     = zeros(maxfunds,maxnumCF);
PEfund_RS       = zeros(maxfunds,maxnumCF);
PEfund_RS2      = zeros(maxfunds,maxnumCF);
PEfund_RH       = zeros(maxfunds,maxnumCF);
PEfund_RH2      = zeros(maxfunds,maxnumCF);
PEfund_RL       = zeros(maxfunds,maxnumCF);
PEfund_Rmvol    = zeros(maxfunds,maxnumCF);
PEfund_Rmvar    = zeros(maxfunds,maxnumCF);
PEfund_RB       = zeros(maxfunds,maxnumCF);
PEfund_RSL      = zeros(maxfunds,maxnumCF);
PEfund_RSM      = zeros(maxfunds,maxnumCF);
PEfund_RSH      = zeros(maxfunds,maxnumCF);
PEfund_RBL      = zeros(maxfunds,maxnumCF);
PEfund_RBM      = zeros(maxfunds,maxnumCF);
PEfund_RBH      = zeros(maxfunds,maxnumCF);
PEfund_RSLop    = zeros(maxfunds,maxnumCF);
PEfund_RSMop    = zeros(maxfunds,maxnumCF);
PEfund_RSHop    = zeros(maxfunds,maxnumCF);
PEfund_RBLop    = zeros(maxfunds,maxnumCF);
PEfund_RBMop    = zeros(maxfunds,maxnumCF);
PEfund_RBHop    = zeros(maxfunds,maxnumCF);
PEfund_RLop     = zeros(maxfunds,maxnumCF);
PEfund_RHop     = zeros(maxfunds,maxnumCF);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Go through the PE funds one by one
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
cnt = 0;    % keep total count of included funds
i   = 1;    % current row in the input file
while (i < length(inCF_fundid))   % keep processing until run out of data
    cur_fundid  = inCF_fundid(i);   % current fund id    
    
    % Determine current fund's starting and ending rows in input data
    strti = i;                % starting row in input data
    while (i <= length(inCF_fundid)) && (inCF_fundid(i) == cur_fundid)
        i = i + 1; 
    end
    endi = i - 1;            % ending row in input data
    
    % Check if fund is VC or Buyout
    isVC = false; if strcmpi(inCF_class(strti), 'Venture Capital'), isVC = true; end
    isBO = false; if strcmpi(inCF_class(strti), 'Buyout'), isBO = true; end
    isGE = false; if strcmpi(inCF_class(strti), 'Expansion Capital'), isGE = true; end
    
    % Determine fund size in 1990 millions of U.S. Dollars
    size1990    = (inCF_size(strti) / 1000000) ...
                    * CPI(CPI_date==1990) / CPI(CPI_date == inCF_vintage(strti));     
%     if ~strcmp(inCF_currency,'USD'), size1990 = NaN; end  % check if in USD
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Now process the fund, if it satisfies all of these conditions:
    % 1) VC or Buyout or Growth Equity fund (no Fund of Funds!)
    % 2) Vintage 2012 or earlier
    % 3) >= $5m in AUM in 1990 dollars
    % 4) U.S.-focused
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    if (isVC || isBO ||isGE) && (inCF_vintage(strti) <= 2016) && ...
        (size1990 > 5) && strcmp(inCF_subfocus(strti),'United States')
        
        disp(['Processing fund #' num2str(inCF_fundid(strti))]);
    
        % Convert all fund transaction dates into Matlab datenum format.
        % Resort all transactions first by date (earliest to latest), then by
        % transaction type, with valuations last (since any valuations that
        % coincide with distributions are post-distribution).
        transdatenum    = datenum(inCF_transdate(strti:endi));
        transtype       = zeros(endi-strti+1,1);
        % recode transtype to 1 (contribution), 2 (distribution), 3(valuation), for sorting.
        transtype(strcmp('Contribution', inCF_transtype(strti:endi))) = 1;
        transtype(strcmp('Distribution', inCF_transtype(strti:endi))) = 2;
        transtype(strcmp('Valuation', inCF_transtype(strti:endi))) = 3;
        transamt        = inCF_transamt(strti:endi);
        Z               = sortrows([transdatenum transtype transamt], [1 2]); % sort first by date, then by transaction type
        transdatenum    = Z(:,1);
        transtype       = Z(:,2);
        transamt        = Z(:,3);
        
        % Drop any transactions after the data cutoff date (these are incomplete)
        while (transdatenum(end) > maxdate)
            transdatenum(end)   = [];
            transtype(end)      = [];
            transamt(end)       = [];
        end        
               
        % Determine final observed NAV, which is the last observed "Valuation"
        % (transtype = 3) plus/minus any later contributions/distributions.
        % Adjust the date of the final NAV to be the date of the last cash flow.
        j = length(transtype); while (j > 0) && (transtype(j) ~= 3), j = j - 1; end
        if (j > 0)  % catch funds with not a single reported NAV (usually very young funds; not usable)
            finalNAV                = transamt(j);
            finalNAV_datenum        = transdatenum(j);
            j                       = j + 1;
            while (j <= length(transamt))    % process all cash flows after final reported NAV
                if (transamt(j) ~= 0)   % if zero contr/distr then don't update the final NAV date
                    finalNAV            = finalNAV - transamt(j);
                    finalNAV_datenum    = transdatenum(j);
                end
                j                   = j + 1;
            end
        
            % Check that the first transaction is a contribution
            while ~isempty(transtype)  && (transtype(1) ~= 1)  
                disp(['Fund #' num2str(inCF_fundid(strti)) ': ' inCF_transtype(1) ' before first Contribution']);
                transdatenum(1)     = [];   % delete any transactions preceding the first contribution
                transtype(1)        = [];
                transamt(1)         = [];
            end

            % Drop any trailing zeros (not a big deal, but helps to speed things up)
            while ~isempty(transamt) && (transamt(end) == 0)
                transdatenum(end)   = [];
                transtype(end)      = [];
                transamt(end)       = [];
            end
        else
            transdatenum = [];      % not a single NAV reported for this fund, will be skipped
        end
        
        % If there is valid transaction data for this fund, process it
        if ~isempty(transdatenum)    % skip any empty funds 
            
            cnt = cnt + 1;      % increase fund counter
        
            % Basic fund characteristics
            PEfund_fundid(cnt)      = inCF_fundid(strti);
            PEfund_firmid(cnt)      = inCF_firmid(strti);   % missing = NaN
            PEfund_vintage(cnt)     = inCF_vintage(strti);
            PEfund_size(cnt)        = inCF_size(strti) / 1000000;
            PEfund_size1990(cnt)    = size1990;       
            % determine the fund's asset class
            if isVC, PEfund_type(cnt,1) = 1; end
            if isBO, PEfund_type(cnt,1) = 2; end
            if isGE, PEfund_type(cnt,1) = 3; end
            % and subclass (for VC)
            PEfund_subtype(cnt)     = 0;    
            if strcmpi(inCF_subclass(strti), 'Generalist') , PEfund_subtype(cnt,1) = 1; end
            if strcmpi(inCF_subclass(strti), 'Early Stage'), PEfund_subtype(cnt,1) = 2; end
            if strcmpi(inCF_subclass(strti), 'Late Stage') , PEfund_subtype(cnt,1) = 3; end  

            
            % Add the final NAV as a separate distribution (if includeNAV==1)
            % This may result in two cash flows on the same date, but that's ok. 
            % NB: NAVs are ignored in the processing of cash flows below, so there is no double-counting
            PEfund_finalNAV(cnt)    = max(0, finalNAV); % take care of cases where final distributions make NAV negative
            if (includeNAV == 1) && (PEfund_finalNAV(cnt) > 0)
                transdatenum    = [transdatenum; finalNAV_datenum];
                transtype       = [transtype   ; 2];   % Distribution: transtype = 2
                transamt        = [transamt    ; PEfund_finalNAV(cnt)]; 
            end
            
        
            % Construct matrix with cash flows and a matrix with time-since-fund-inception
            distr               = zeros(1,length(transamt));   % separate vector for distributions
            contr               = zeros(1,length(transamt));   % separate vector for contributions
            cntCF               = 0;
            for j = 1:length(transamt)
                % Enter the cash flows into the matrix. Notes:
                % 1) Calls are sometimes positive: 
                %       Can occur if the fund manager calls up capital for a deal, but for whatever reason the deal falls through or a smaller amount of capital is required. The GP refunds some capital to the LPs, which can be called later.
                %       Just treat as a negative call for the purpose of computing TVPI, IRR, and (G)PME.
                % 2) Distributions are sometimes negative: 
                %       Reflects the occurrence of recallable distributions. Fund managers often have the option of recalling a certain proportion of the distributions made to investors for further investments, effectively recycling commitments.
                %       For TVPI, per GIPS standards, recallable distributions should be considered included in the numerator of this ratio. Any reinvested capital (resulting from recallable distributions) should be included in the contributions.
                %       As a result, cumulative calls can exceed fund size
                % NB: These issues only matters for TVPI and traditional PME, where there is a distinction between calls and distributions. 
                %     GPME and IRR only care about net cash flows, and whether a cash flow is considered a positive or negative call or distribution is inconsequential
                if (transtype(j) == 1) || (transtype(j) == 2) % if contribution or distribution (i.e., skip all valuations)
                    cntCF                       = cntCF + 1;
                    PEfund_CF(cnt,cntCF)        = transamt(j);     
                    PEfund_datenums(cnt,cntCF)  = transdatenum(j);
                    
                    % Compute time since fund inception (in years)
                    PEfund_T(cnt,cntCF)         = (transdatenum(j) - transdatenum(1))/365;
                    
                    % separate contributions and distributions (only used for computing TVPI and PME)
                    if (transtype(j) == 1)  % contribution 
                    	contr(cntCF)            = -transamt(j); 
                    else                    % distribution (will include final NAV if includeNAV==1)
                        if transamt(j) > 0  % normal distribution
                            distr(cntCF)        = transamt(j); 
                        else                % negative distribution, should be considered a contribution (this pulls TVPI and PME towards 1)
                            contr(cntCF)        = -transamt(j); 
                        end
                    end
                    
                    % Compute the matching factor returns between fund firstdate and transaction date
                    IIstrt = find(FF_datenum == transdatenum(1)); 
                    if isempty(IIstrt)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF_datenum == transdatenum(1)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF_datenum == transdatenum(1)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIstrt  = find(FF_datenum == transdatenum(1)-klag);     % go before
                        else
                            IIstrt  = find(FF_datenum == transdatenum(1)+klead);    % go after
                        end
                    end
                    IIend  = find(FF_datenum == transdatenum(j));
                    if isempty(IIend)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF_datenum == transdatenum(j)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF_datenum == transdatenum(j)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIend   = find(FF_datenum == transdatenum(j)-klag);     % go before
                        else
                            IIend   = find(FF_datenum == transdatenum(j)+klead);    % go after
                        end
                    end
                    PEfund_Rm(cnt,cntCF)       = prod(1+FF_rm(IIstrt+1:IIend));
                    PEfund_IIstrt(cnt,cntCF)   = IIstrt;
                    PEfund_IIend(cnt,cntCF)    = IIend;
                    PEfund_Rsmb(cnt,cntCF)     = prod(1+FF_rSMB(IIstrt+1:IIend));
                    PEfund_RS2(cnt,cntCF)      = prod(1+FF_rf(IIstrt+1:IIend)+FF_rSMB(IIstrt+1:IIend));
                    PEfund_RH2(cnt,cntCF)      = prod(1+FF_rf(IIstrt+1:IIend)+FF_rHML(IIstrt+1:IIend));
                    PEfund_Rhml(cnt,cntCF)     = prod(1+FF_rHML(IIstrt+1:IIend));
                    PEfund_Rrmw(cnt,cntCF)     = prod(1+FF_rRMW(IIstrt+1:IIend));
                    PEfund_Rcma(cnt,cntCF)     = prod(1+FF_rCMA(IIstrt+1:IIend));
                    PEfund_Rf(cnt,cntCF)       = prod(1+FF_rf(IIstrt+1:IIend));             
                                                                                      
                    PEfund_Rmvol(cnt,cntCF)    = std(FF_rm(IIstrt+1:IIend));

                    if (IIstrt < IIend) 
                        PEfund_Rmvar(cnt,cntCF) = var(FF_rm(IIstrt+1:IIend)) * PEfund_T(cnt,cntCF) * 250;
                    else
                        PEfund_Rmvar(cnt,cntCF) = 0;
                    end                    
                    
                    IIstrt = find(FF6_datenum == transdatenum(1)); 
                    if isempty(IIstrt)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF6_datenum == transdatenum(1)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6_datenum == transdatenum(1)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIstrt  = find(FF6_datenum == transdatenum(1)-klag);     % go before
                        else
                            IIstrt  = find(FF6_datenum == transdatenum(1)+klead);    % go after
                        end
                    end
                    IIend  = find(FF6_datenum == transdatenum(j));
                    if isempty(IIend)
                        klag = 1 ; while isempty(find(FF6_datenum == transdatenum(j)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6_datenum == transdatenum(j)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIend   = find(FF6_datenum == transdatenum(j)-klag);     % go before
                        else
                            IIend   = find(FF6_datenum == transdatenum(j)+klead);    % go after
                        end
                    end
                    PEfund_RSL(cnt,cntCF)      = prod(1+FF6_SL(IIstrt+1:IIend));
                    PEfund_RSM(cnt,cntCF)      = prod(1+FF6_SM(IIstrt+1:IIend));
                    PEfund_RSH(cnt,cntCF)      = prod(1+FF6_SH(IIstrt+1:IIend));
                    PEfund_RS(cnt,cntCF)       = prod(1+(FF6_SL(IIstrt+1:IIend)+FF6_SM(IIstrt+1:IIend)+FF6_SH(IIstrt+1:IIend))/3 );
                    PEfund_RB(cnt,cntCF)       = prod(1+(FF6_BL(IIstrt+1:IIend)+FF6_BM(IIstrt+1:IIend)+FF6_BH(IIstrt+1:IIend))/3 );
                    PEfund_RBL(cnt,cntCF)      = prod(1+FF6_BL(IIstrt+1:IIend));
                    PEfund_RBM(cnt,cntCF)      = prod(1+FF6_BM(IIstrt+1:IIend));
                    PEfund_RBH(cnt,cntCF)      = prod(1+FF6_BH(IIstrt+1:IIend));   
                    PEfund_RH(cnt,cntCF)       = prod(1+(FF6_SH(IIstrt+1:IIend)+FF6_BH(IIstrt+1:IIend))/2 );
                    PEfund_RL(cnt,cntCF)       = prod(1+(FF6_SL(IIstrt+1:IIend)+FF6_BL(IIstrt+1:IIend))/2 );
                    
                    IIstrt = find(FF6MEOP_datenum == transdatenum(1)); 
                    if isempty(IIstrt)  % if no exact date match, find closest prior or next date in Fama-French data
                        klag = 1 ; while isempty(find(FF6MEOP_datenum == transdatenum(1)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6MEOP_datenum == transdatenum(1)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIstrt  = find(FF6MEOP_datenum == transdatenum(1)-klag);     % go before
                        else
                            IIstrt  = find(FF6MEOP_datenum == transdatenum(1)+klead);    % go after
                        end
                    end
                    IIend  = find(FF6MEOP_datenum == transdatenum(j));
                    if isempty(IIend)
                        klag = 1 ; while isempty(find(FF6MEOP_datenum == transdatenum(j)-klag, 1)) , klag = klag + 1;   end
                        klead = 1; while isempty(find(FF6MEOP_datenum == transdatenum(j)+klead, 1)), klead = klead + 1; end
                        if klag < klead
                            IIend   = find(FF6MEOP_datenum == transdatenum(j)-klag);     % go before
                        else
                            IIend   = find(FF6MEOP_datenum == transdatenum(j)+klead);    % go after
                        end
                    end             
                    PEfund_RSLop(cnt,cntCF) = prod(1+FF6_SLop(IIstrt+1:IIend));
                    PEfund_RSMop(cnt,cntCF) = prod(1+FF6_SMop(IIstrt+1:IIend));
                    PEfund_RSHop(cnt,cntCF) = prod(1+FF6_SHop(IIstrt+1:IIend));
                    PEfund_RBLop(cnt,cntCF) = prod(1+FF6_BLop(IIstrt+1:IIend));
                    PEfund_RBMop(cnt,cntCF) = prod(1+FF6_BMop(IIstrt+1:IIend));
                    PEfund_RBHop(cnt,cntCF) = prod(1+FF6_BHop(IIstrt+1:IIend));
                    PEfund_RHop(cnt,cntCF)  = prod(1+(FF6_SHop(IIstrt+1:IIend)+FF6_BHop(IIstrt+1:IIend))/2 );         
                    PEfund_RLop(cnt,cntCF)  = prod(1+(FF6_SLop(IIstrt+1:IIend)+FF6_BLop(IIstrt+1:IIend))/2 );   
                                              
                end % if (transtype = 1 or 2)                         
            end % for j

            % Count # unique net cash flows
            dn = PEfund_datenums(cnt,:);
            PEfund_numCF(cnt)       = length(unique(dn(dn>0)));
            
            % Determine liquidation status
            PEfund_liq100(cnt,1)    = 0;
            PEfund_liq95(cnt,1)     = 0;
            PEfund_liq90(cnt,1)     = 0;   
            if PEfund_vintage(cnt,1) < 2009 % any fund less than 10y old will not be liquidated yet
                if (PEfund_finalNAV(cnt) == 0)
                    PEfund_liq100(cnt,1) = 1;   % fund is fully liquidated
                end
                if ((PEfund_finalNAV(cnt)/1000000) / PEfund_size(cnt) <= .05)	% NB: PEfund_size is in $m
                    PEfund_liq95(cnt,1) = 1; 
                end
                if ((PEfund_finalNAV(cnt)/1000000) / PEfund_size(cnt) <= .1)
                    PEfund_liq90(cnt,1) = 1;
                end
            end
            
            % compute TVPI (don't use CF because a negative distribution is not a capital call)
            PEfund_TVPI(cnt) = sum(distr) / sum(contr);  % NB: Final NAV is included in distr (if includeNAV==1), so don't add it again
                        
            % compute PME (don't use CF because a negative distribution is not a capital call)
            discdistr = distr(1:cntCF)./PEfund_Rm(cnt,1:cntCF);  % NB: Final NAV is included in distr (if includeNAV==1), so don't add it again
            disccontr = contr(1:cntCF)./PEfund_Rm(cnt,1:cntCF);
            PEfund_PME(cnt) = sum(discdistr) / sum(disccontr); 
            
            % compute IRR
            startIRR = [5; 2; 1; .5; .4; .3; .2; .1; 0; -.1; -.2; -.3; -.4; -.5];	% try different starting values;
            curmin = 100000000;
            options = optimset; options = optimset(options, 'display', 'off');
            PEfund_IRR(cnt) = NaN;
            for j = 1:length(startIRR)
                try
                    [IRR_try, fminval, exitflag] = fzero(@(x) sum(PEfund_CF(cnt,:)./((1+x).^min(10,PEfund_T(cnt,:)))), startIRR(j), options);    % the min(10,PEfund_T) is because this is how Preqin calculates IRRs, but is nominally wrong!!    
                catch
                end
                if (abs(fminval) < curmin) && (exitflag > 0)
                    PEfund_IRR(cnt) = IRR_try*100;
                    curmin = fminval;
                end
            end   
            if (sum(contr) > 0) && (sum(distr) == 0)
                PEfund_IRR(cnt) = -100;
            end            
            
        else    % fund is empty: no valid transaction data
            disp(['Fund #' num2str(inCF_fundid(strti)) ' is empty or has no NAV: Not included in sample']);
        end  % if ~isempty(transdatenum)
    end % if (isVC II isBO) && ...    
end % outer while loop


% Delete trailing (unused) zero rows and columns
varlist = who('PEfund_*','R*');
lastrow = length(PEfund_fundid); while (PEfund_fundid(lastrow) == 0), lastrow = lastrow - 1; end
lastcol = size(PEfund_CF,2); while (sum(PEfund_CF(:,lastcol)==0) == maxfunds), lastcol = lastcol - 1; end
for i = 1:length(varlist)
    eval([varlist{i} '(:,lastcol+1:end,:) = [];']);
    eval([varlist{i} '(lastrow+1:end,:,:) = [];']);
end


% Construct fund sequence numbers for each GP in the data, and characteristics of the next fund by the same GP
% Assign sequence number within fund type (VC vs buyout) but don't care whether funds qualify or not on other dimensions (e.g. size, location)
% Thus, the final data set does not necessarily contain each number of a GP's sequence!
for i = 1:length(PEfund_fundid)
    % Try to find data for the next fund of the same firm (used for regressions below)            
    II = find(inCF_firmid == PEfund_firmid(i)); % returns empty set if PEfund_firmid(i) = NaN
    if ~isempty(II)
        nextset = [inCF_vintage(II) inCF_fundid(II) II inCF_size(II)/1000000]; 
        nexttype = zeros(length(II),1);
        for j = 1:length(II)
            if strcmpi(inCF_class(II(j)), 'Venture Capital'), nexttype(j) = 1; end
            if strcmpi(inCF_class(II(j)), 'Buyout')         , nexttype(j) = 2; end
            if strcmpi(inCF_class(II(j)), 'Expansion Capital'), nexttype(j) = 3; end
        end
        nextset = nextset(nexttype==PEfund_type(i), :);     % only keep funds of the same type  
        [~,II] = unique(nextset(:,2));                      % only keep one record for each fund
        nextset = nextset(II,:);
        nextset = sortrows(nextset,1);                      % sort by vintageyear   
        nextset = [nextset [1:size(nextset,1)]'];           % add sequence numbers
        % determine the present fund's sequence number
        IIcur = find(nextset(:,2) == PEfund_fundid(i));    % find current fund within nextset
        PEfund_seqnr(i) = nextset(IIcur,end);
        % now determine next fund characteristics (for regressions below)
        IIcur = IIcur + 1; done = false;
        while ~done
            if (IIcur > size(nextset,1))
                done = true;
            else
                if strcmp(inCF_subfocus(nextset(IIcur,3)),'United States')   % skip non-US: could be different team/skill 
                    PEfund_nextfundid(i)    = nextset(IIcur,2);
                    PEfund_nextvintage(i)   = nextset(IIcur,1);
                    PEfund_nextsize(i)      = nextset(IIcur,4);
                    PEfund_nextsize1990(i)  = PEfund_nextsize(i) * CPI(CPI_date==1990) / CPI(CPI_date == PEfund_nextvintage(i)); 
                    done = true;
                end
                IIcur = IIcur + 1;
            end
        end
    end
end


% Save a temporary local data file (NOT TO BE SENT BACK, WILL BE DELETED)
save ('temp', 'PEfund*', 'includeNAV');

toc


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Produce Summary Statistics 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

disp(' ')
disp('Compute summary statistics')
disp(' ')

for liq = 0:3   % repeat tables for each liquidation status
    
    disp('***************************************************************')
    switch liq
        case 0, s = '';        d = 'All funds';             disp(['* ' d]); IIliq = ones(size(PEfund_vintage))>0;
        case 1, s = '_liq100'; d = '100% Liquidated funds'; disp(['* ' d]); IIliq = (PEfund_liq100 == 1);
        case 2, s = '_liq95';  d = '95% Liquidated funds';  disp(['* ' d]); IIliq = (PEfund_liq95 == 1);
        case 3, s = '_liq90';  d = '90% Liquidated funds';  disp(['* ' d]); IIliq = (PEfund_liq90 == 1);
    end
    disp('***************************************************************')
    disp(' ')

    stats_years = (min(PEfund_vintage):max(PEfund_vintage))';
    eval(['stats_numfunds' s '          = ones(length(stats_years)+6,6)*NaN;']);
    eval(['stats_numfirms' s '          = ones(length(stats_years)+6,6)*NaN;']);
    eval(['stats_fundsperfirm' s '      = ones(length(stats_years)+6,9,6)*NaN;']);
    eval(['stats_missingfirms' s '      = ones(length(stats_years)+6,6)*NaN;']);
    eval(['stats_numCFperfund' s '      = ones(length(stats_years)+6,9,6)*NaN;']);
    eval(['stats_fundsize' s '          = ones(length(stats_years)+6,9,6)*NaN;']);
    eval(['stats_effectiveT' s '        = ones(length(stats_years)+6,9,6)*NaN;']);
    eval(['stats_TVPI' s '              = ones(length(stats_years)+6,11,6)*NaN;']);
    eval(['stats_IRR' s '               = ones(length(stats_years)+6,11,6)*NaN;']);
    eval(['stats_PME' s '               = ones(length(stats_years)+6,11,6)*NaN;']);
    eval(['stats_TVPIwtd' s '           = ones(length(stats_years)+6,2,6)*NaN;']);
    eval(['stats_IRRwtd' s '            = ones(length(stats_years)+6,2,6)*NaN;']);
    eval(['stats_PMEwtd' s '            = ones(length(stats_years)+6,2,6)*NaN;']);
    
    % Compute statistics (# funds, # firms, TVPI, etc) by vintage year, and across all years
    for i = 1:length(stats_years)+6

        % Construct matrix with # funds by vintage year (rows) and type (columns: VC_all BO GE)
        % first row is <=1980, second row is <=1985
        % final 4 rows are by decade (<1990, 1990-99, 2000-10) and across all years, respectively.
        if i == 1
            II = IIliq & (PEfund_vintage <= 1980);
        elseif i == 2
            II = IIliq & (PEfund_vintage <= 1985);
        elseif i <= length(stats_years)+2
            II = IIliq & (PEfund_vintage == stats_years(i-2));
        elseif i == length(stats_years)+3
            II = IIliq & (PEfund_vintage <= 1989);
        elseif i == length(stats_years)+4
            II = IIliq & (PEfund_vintage >= 1990) & (PEfund_vintage <= 1999);
        elseif i == length(stats_years)+5
            II = IIliq & (PEfund_vintage >= 2000);
        elseif i == length(stats_years)+6
            II = IIliq & (ones(size(PEfund_vintage))>0);    % across all years
        end
        
        eval(['stats_numfunds' s '(i,:) = [sum(PEfund_type(II)==1) sum(PEfund_type(II)==1 & PEfund_subtype(II)==1) sum(PEfund_type(II)==1 & PEfund_subtype(II)==2) sum(PEfund_type(II)==1 & PEfund_subtype(II)==3) sum(PEfund_type(II)==2) sum(PEfund_type(II)==3)];']);

        % Matrix with # firms (same structure as stats_numfunds)
        eval(['stats_numfirms' s '(i,:) = [sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==1)))) '...
            'sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==2)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==3)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==2)))) sum(~isnan(unique(PEfund_firmid(II & PEfund_type==3))))];']); 

        % Summary stats of # funds per firm
        % 3D Matrix with vintage year first, stats second ([mean, stdev, min, p10, p25, median, p75, p90, max]), and type/subtype third ([VC_all VC_balanced VC_early VC_late BO GE])
        % first row is <=1980, second row is <=1985
        % final 4 rows are by decade (<1990, 1990-99, 2000-10) and across all years, respectively.
        firmid = unique(PEfund_firmid(II & PEfund_type==1)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
        fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==1) == firmid(j)); end   % count # funds per firm
        if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,1) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        for k = 1:3
            firmid = unique(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==k)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
            fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==k) == firmid(j)); end   % count # funds per firm
            if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,k+1) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        end    
        firmid = unique(PEfund_firmid(II & PEfund_type==2)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
        fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==2) == firmid(j)); end   % count # funds per firm
        if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,5) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        firmid = unique(PEfund_firmid(II & PEfund_type==3)); firmid(isnan(firmid)) = []; % list of unique firms, not counting missing firmid (= NaN)
        fpf = zeros(length(firmid),1); for j = 1:length(firmid), fpf(j) = sum(PEfund_firmid(II & PEfund_type==3) == firmid(j)); end   % count # funds per firm
        if ~isempty(fpf), eval(['stats_fundsperfirm' s '(i,:,6) = [mean(fpf) std(fpf) min(fpf) prctile(fpf,[10 25 50 75 90]) max(fpf)];']); end
        % Count # funds with missing GP identifiers
        eval(['stats_missingfirms' s '(i,:) = [sum(isnan(PEfund_firmid(II & PEfund_type==1))) sum(isnan(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==1))) '...
            'sum(isnan(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==2))) sum(isnan(PEfund_firmid(II & PEfund_type==1 & PEfund_subtype==3))) sum(isnan(PEfund_firmid(II & PEfund_type==2))) sum(isnan(PEfund_firmid(II & PEfund_type==3)))];']); 
        
        % # Cash flows per fund (same structure as stats_fundsperfirm)
        x = PEfund_numCF(II & PEfund_type==1);    if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,1) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,2) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,3) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,4) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==2);    if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,5) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_numCF(II & PEfund_type==3);    if ~isempty(x), eval(['stats_numCFperfund' s '(i,:,6) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_size(II & PEfund_type==1);    if ~isempty(x), eval(['stats_fundsize' s '(i,:,1) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_fundsize' s '(i,:,2) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_fundsize' s '(i,:,3) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_fundsize' s '(i,:,4) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==2);    if ~isempty(x), eval(['stats_fundsize' s '(i,:,5) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_size(II & PEfund_type==3);    if ~isempty(x), eval(['stats_fundsize' s '(i,:,6) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = max(PEfund_T(II & PEfund_type==1,:),[],2);    if ~isempty(x), eval(['stats_effectiveT' s '(i,:,1) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==1 & PEfund_subtype==1,:),[],2); if ~isempty(x), eval(['stats_effectiveT' s '(i,:,2) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==1 & PEfund_subtype==2,:),[],2); if ~isempty(x), eval(['stats_effectiveT' s '(i,:,3) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==1 & PEfund_subtype==3,:),[],2); if ~isempty(x), eval(['stats_effectiveT' s '(i,:,4) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==2,:),[],2);    if ~isempty(x), eval(['stats_effectiveT' s '(i,:,5) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = max(PEfund_T(II & PEfund_type==3,:),[],2);    if ~isempty(x), eval(['stats_effectiveT' s '(i,:,6) = [mean(x) std(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_TVPI(II & PEfund_type==1);    if ~isempty(x), eval(['stats_TVPI' s '(i,:,1) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_TVPI' s '(i,:,2) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_TVPI' s '(i,:,3) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_TVPI' s '(i,:,4) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==2);    if ~isempty(x), eval(['stats_TVPI' s '(i,:,5) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_TVPI(II & PEfund_type==3);    if ~isempty(x), eval(['stats_TVPI' s '(i,:,6) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_TVPI(II & PEfund_type==1);    w = PEfund_size(II & PEfund_type==1);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,1) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==1); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,2) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==2); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,3) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==1 & PEfund_subtype==3); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,4) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==2);    w = PEfund_size(II & PEfund_type==2);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,5) = [m st];']); end
        x = PEfund_TVPI(II & PEfund_type==3);    w = PEfund_size(II & PEfund_type==3);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_TVPIwtd' s '(i,1:2,6) = [m st];']); end
        
        x = PEfund_IRR(II & PEfund_type==1);    if ~isempty(x), eval(['stats_IRR' s '(i,:,1) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_IRR' s '(i,:,2) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_IRR' s '(i,:,3) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_IRR' s '(i,:,4) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==2);    if ~isempty(x), eval(['stats_IRR' s '(i,:,5) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_IRR(II & PEfund_type==3);    if ~isempty(x), eval(['stats_IRR' s '(i,:,6) = [nanmean(x) nanstd(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end

        x = PEfund_IRR(II & PEfund_type==1 & ~isnan(PEfund_IRR));    w = PEfund_size(II & PEfund_type==1 & ~isnan(PEfund_IRR));    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,1) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==1 & ~isnan(PEfund_IRR)); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1 & ~isnan(PEfund_IRR)); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,2) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==2 & ~isnan(PEfund_IRR)); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2 & ~isnan(PEfund_IRR)); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,3) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==1 & PEfund_subtype==3 & ~isnan(PEfund_IRR)); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3 & ~isnan(PEfund_IRR)); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,4) = [m st];']); end
        x = PEfund_IRR(II & PEfund_type==2 & ~isnan(PEfund_IRR));    w = PEfund_size(II & PEfund_type==2 & ~isnan(PEfund_IRR));    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,5) = [m st];']); end      
        x = PEfund_IRR(II & PEfund_type==3 & ~isnan(PEfund_IRR));    w = PEfund_size(II & PEfund_type==3 & ~isnan(PEfund_IRR));    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_IRRwtd' s '(i,1:2,6) = [m st];']); end      
        
        x = PEfund_PME(II & PEfund_type==1);    if ~isempty(x), eval(['stats_PME' s '(i,:,1) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), eval(['stats_PME' s '(i,:,2) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), eval(['stats_PME' s '(i,:,3) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), eval(['stats_PME' s '(i,:,4) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==2);    if ~isempty(x), eval(['stats_PME' s '(i,:,5) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        x = PEfund_PME(II & PEfund_type==3);    if ~isempty(x), eval(['stats_PME' s '(i,:,6) = [mean(x) std(x) skewness(x) kurtosis(x) min(x) prctile(x, [10 25 50 75 90]) max(x)];']); end
        
        x = PEfund_PME(II & PEfund_type==1);    w = PEfund_size(II & PEfund_type==1);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,1) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==1); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==1); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,2) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==2); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==2); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,3) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==1 & PEfund_subtype==3); w = PEfund_size(II & PEfund_type==1 & PEfund_subtype==3); if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,4) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==2);    w = PEfund_size(II & PEfund_type==2);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,5) = [m st];']); end
        x = PEfund_PME(II & PEfund_type==3);    w = PEfund_size(II & PEfund_type==3);    if ~isempty(x), m = x'*w/sum(w); st = sqrt( w'*(x-m).^2 / ((length(w)-1) * mean(w)) ); eval(['stats_PMEwtd' s '(i,1:2,6) = [m st];']); end
        
    end % loop over years
end % for liq
    

% save summary stats in output folder
save([outdir 'Burgiss_sumstats_' datestr(date,'yyyymmdd')], 'stats_*');


% diary off
% break


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Estimate fund-level (G)PME and alphas 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

tic

disp(' ')
disp('Estimate performance metrics')
disp(' ')

for samp = [1:3]    
    % samp refers to different assumptions about asset classes, liquidation returns, subperiods, etc.
    % It can take on the following values:
    %   1: all PE funds

    
    disp(' '); disp(' '); disp(' ')
    disp('***************************************************************')
    disp(['Processing sample #' num2str(samp)])
    disp('***************************************************************')
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Load data and implement a few selection criteria
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    % start with fresh input dataset
    clearvars PEfund_* stats_* %nextperf_*
    load 'temp'
           
    % Select funds based on fund type
    II = (PEfund_type == 1 | PEfund_type == 2);   % use all VC and buyout funds
    varlist = whos('PEfund_*'); for i = 1:length(varlist), eval([varlist(i).name ' = ' varlist(i).name '(II>0,:,:);']); end   
    
    % construct numcf, an NxJ matrix that has the number of cash flows for each fund in every row (counting duplicate dates, so different from PEfund_numCF!).
    numcf = repmat(sum(PEfund_Rm > 0, 2), 1, size(PEfund_Rf,2)); 

    % choose which portfolios to use
    Rf = PEfund_Rf;
    Rm = PEfund_Rm;

    % Compute max times for use in GMM criterion function
    firstdates = PEfund_datenums(:,1);
    lastdates = max(PEfund_datenums,[],2);
    hi = [max(PEfund_T')' PEfund_datenums(:,1) max(PEfund_datenums,[],2)]; 

    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Generate replicating funds for estimation of SDF parameters
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    %normalize to $1 discounted capital calls
    q = ones(1,size(PEfund_CF,2)); 
    ind = (PEfund_CF>=0); 
    negcf = PEfund_CF./Rf; 
    negcf(ind) = 0; 
    cfnorm = -sum(negcf,2)*q; 
    cf  = PEfund_CF./cfnorm;
            

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Log-utility CAPM (i.e., "traditional" PME)
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Include mkt return and Rf here, too -- are irrelevant, but otherwise
    % code for fundMCexpaff function does not work due to two singleton 
    % dimensions Matlab does not know which one to squeeze in this case 
    r = zeros(size(Rm,1), 3, size(Rm,2)); 
    r(:,3,:) = cf;
    r(:,1,:) = ReplicateCF(cf, Rf, PEfund_T);    
    r(:,2,:) = ReplicateCF(cf, Rm, PEfund_T);       
    f = zeros(size(Rm,1), 2, size(Rm,2));
    f(:,1,:) = PEfund_T;
    f(:,2,:) = log(Rm);     
    prc = [ones(size(r,1), size(r,2)-1) zeros(size(r,1), 1)]; % expected sum of discounted fund cash flows should equal zero, while expected sum of discounted market returns should equal the # of returns (same for rf)
    W = eye(3);
    W(3,3) = 0.00001; 
    aJ = [0; 0; 1];
    
    [~, Jfix, ~, bvar, ~, pJopt, pJfix, gt] = GMMcrit(@fundMCexpaff, [0; -1], f, r, prc, 1, hi, @Smatdatenums, aJ); 
    J_logucapm = Jfix  
    pJ_logucapm = pJfix  
    PMEpre = gt(:,3);           % PME based on actual data (before bootstrap simulation)
    PME = mean(gt(:,3)) 
    se_logucapm = sqrt(PME^2/Jfix)
    
    
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
    % Estimate GPME models with GMM
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Choose set of SDF factors (each row is a model)
    factor_set  = {{'log(Rm)'}}; %...

    % Choose set of assets to fit SDF to (each row is a model)
    fit_set     = {{'Rf','Rm'}};

    % Choose riskfree rate (needed for estimating betas; each row is a model)
    logRf_set = {'log(Rf)'};

    label_set = {{'CAPM', 'capm'}};
            
    options = optimset('TolFun',1.0e-18,'MaxIter',2500,'TolX',1.0e-18, ...
                       'MaxFunEvals',100000, 'Display', 'off');

    PMEi_save = NaN(size(cf,1),length(fit_set),6);                   
    alphai_save = NaN(size(cf,1),length(fit_set),6);
    GPMEi_save = NaN(size(cf,1),length(fit_set),6);
    PMEtildei_save = NaN(size(cf,1),length(fit_set),6);     % true pricing error

    fundbeta_save = NaN(size(cf,1),length(fit_set),6); 
    
    for fm = 1:length(fit_set)   % cycle through models
        factors = factor_set{fm};
        fit = fit_set{fm};
        labels = label_set{fm};

        disp('*****************************************************************************')
        disp(['GPME Model ' num2str(fm) ' (' labels{1} ')'])
        fs = factors{1}; for j = 2:length(factors), fs = [fs ', ' factors{j}]; end    
        disp(['factors  : ' fs])
        fs = fit{1}; for j = 2:length(fit), fs = [fs ', ' fit{j}]; end    
        disp(['fitted to: ' fs])
        disp('*****************************************************************************')

        for truebeta = 1:6
            disp('-----------------------------------------------------------------------------')
            disp(['True (mu) beta = ' num2str(truebeta/2)]);
            disp('-----------------------------------------------------------------------------')

            % Generate replicating funds for estimation of SDF parameters (and include VC cash flows as final asset)
            numr = length(fit)+1;
            r = zeros(size(cf,1),numr,size(cf,2)); 
            for j = 1:length(fit)
                eval(['r(:,j,:) = ReplicateCF(cf, ' fit{j} ', PEfund_T);']);
            end
            r(:,numr,:) = cf; 


            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            % Bootstrap fund data set
            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            
            rng(1);
            rnd_funddraw = randi(size(PEfund_fundid,1), 10000,1);   % draw random funds (pre-draw here so we are drawing the same funds in each iteration of "samp"
            
            % construct levered market portfolio based on true beta
            Rmlev = zeros(size(PEfund_CF));
            switch samp
                case 1, sig_beta = 0;
                case 2, sig_beta = 0.25;
                case 3, sig_beta = 1;
            end
            fundbeta = truebeta/2 + randn(size(Rmlev,1),1)*sig_beta;
            for i = 1:size(Rmlev,1)
                for j = 1:size(Rmlev,2)
                    if ~isnan(PEfund_IIstrt(i,j)) && ~isnan(PEfund_IIend(i,j))
                        IIstrt = PEfund_IIstrt(i,j);
                        IIend = PEfund_IIend(i,j);
                        Rmlev(i,j) = prod(1 + FF_rf(IIstrt+1:IIend) + fundbeta(i)*(FF_rm(IIstrt+1:IIend) - FF_rf(IIstrt+1:IIend)) );     % cumulative return on Rf + (truebeta/2)*(Rm-Rf)
                    end
                end
            end            
            
            r(:,numr,:) = ReplicateCF(cf, Rmlev, PEfund_T);        % 3rd dimension of PEfund_Rmlev is the beta, from 0.5 to 3 in increments of 0.5

            % for each fund, add a PME-tilde from a non-overlapping fund    
            rng(1);
            PMEtilde = zeros(size(PEfund_fundid,1),1);    % keep track of what PME was added to each fund; this is the true pricing error
            randfundi = 1;
            for k = 1:size(PEfund_fundid,1)
                j = rnd_funddraw(randfundi); randfundi = randfundi + 1;   % draw a random fund                
                PMEtilde(k) = PMEpre(j);    %randn*1;
                r(k,numr,1) = r(k,numr,1) + PMEtilde(k);   % add the PME-tilde to the first cash flow          
            end

            % re-estimate fund PMEs on bootstrapped data
            f = zeros(size(cf,1), 2, size(cf,2));
            f(:,1,:) = PEfund_T;
            f(:,2,:) = log(Rm);                 
            PMEi = fundMCexpaff_hvar([0; -1],-1,f,r(:,numr,:),prc(:,numr),hi,1);
            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
            % End bootstrap
            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%    


            % vector of prices
            prc = zeros(size(r,1),size(r,2));
            % weights
            W = eye(numr);
            W(numr,numr) = 0.00000001;   %SDF parameters chosen to fit Rm and Rf perfectly, put (virtually) no weight on VC

            % set up the pricing factors
            numf = length(factors);
            f = zeros(size(cf,1),numf+1,size(cf,2)); 
            f(:,1,:) = PEfund_T; 
            for j = 1:numf
                eval(['f(:,j+1,:) = ' factors{j} ';'])
            end

            % estimate SDF
            initb = [0.9; -ones(numf,1)*2];%initb = zeros(numf+1,1);%[0; -2; -2]; %0.03; -1.6; -0.5
            aJ = [zeros(numr-1,1); 1];
            if (numf+1 > numr-1)
                fprintf('\nERROR: Too few assets, cannot estimate SDF\n')
                break
            end
            if (numf==1)%(numf+1 == numr-1)                            
                b = fsolve(@(b) GMMcritg(@fundMCexpaff,b,f,r(:,1:numr-1,:),prc(:,1:numr-1),eye(numr-1)), initb, options)  %fundMCexpaff takes log factors as inputs
            else%if (numf+1 < numr-1)
                b = fminsearch(@(b) sum(GMMcritg(@fundMCexpaff,b,f,r(:,1:numr-1,:),prc(:,1:numr-1),eye(numr-1)).^2), initb, options)
            end        

            % estimate GPME
            [~, Jfix, ~, bvar, ~, ~, pJfix, gt, gvar] = GMMcrit(@fundMCexpaff, b, f, r, prc, W, hi, @Smatdatenums, aJ); 

            bse     = sqrt(diag(bvar))
            J       = Jfix  
            pJ      = pJfix  
            GPMEi   = gt(:,numr);
            GPME    = mean(gt(:,numr))
            se      = sqrt(gvar)

            % Back out betas
            f = zeros(size(cf,1),numf+2,size(cf,2));
            f(:,1,:) = PEfund_T;
            eval(['f(:,2,:) = ' logRf_set{fm} ';'])
            for j = 1:numf
                eval(['f(:,j+2,:) = ' factors{j} ';'])
            end    

            % estimate factor covariance matrix, taking into account longer
            % horizons as well (possibly some mean reversion)
            vart = zeros(length(factors), length(factors), 10);
            for i = 1:15
                ind = (PEfund_T > i-1 & PEfund_T <= i); 
                S = []; for j = 1:length(factors), eval(['fj = ' factors{j} ';']); S(:,j) = fj(ind)./sqrt(PEfund_T(ind)); end
                vart(:,:,i) = cov(S);    % more accurate to scale each obs by its PEfund_T?
            end     

            if numf == 1 % one factor only
                initb = 0.1;  % reasonable beta
                [betalo, fvallo] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, b, vart, f, r(:,numr,:), GPME, 1), initb, options)  %fundMCexpaff takes log factors as inputs

                b = betalo; 
                [gtlo, ~] = fundMCexpaff_hvar(b, vart, f, r(:,numr,:), GPME, 0, 1); 
                varlo = var(gtlo)    % compute variance of alpha for this beta estimate

                initb = 30;   % crazy beta
                [betahi, fvalhi] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, b, vart, f, r(:,numr,:), GPME, 1), initb, options)  %fundMCexpaff takes log factors as inputs

                b = betahi; 
                [gthi, ~] = fundMCexpaff_hvar(b, vart, f, r(:,numr,:), GPME, 0, 1); 
                varhi = var(gthi)    % compute variance of alpha for this beta estimate

                % pick beta estimate at which variance of alpha is lowest
                if varlo < varhi    
                    beta_est = betalo
                    g_est = gtlo;
                else
                    beta_est = betahi
                    g_est = gthi;
                end

                minv = var(g_est) % report as a model-selection metric   

                % Apply only to r(:,numr,:) since benchmark portfolio requires 
                % asset-specific betas to price, and hence does not price rf, Rm with the PE beta
                aJ = 1; 
                [~, Jfix, ~, bvar, ~, ~, pJfix, gt] = GMMcrit_hvar(@fundMCexpaff_hvar, beta_est, vart, f, r(:,numr,:), prc(:,numr), 1, hi, @Smatdatenums, aJ); 

            elseif numf == 2    % more than 1 factor (NB: currently can only do no more than two factors)

                nbeta1 = 300; 

                beta1s = linspace(-5,5,nbeta1); % grid
                beta2s = zeros(nbeta1,1);         
                vars = zeros(nbeta1,1); 
                ms = zeros(nbeta1,1); 
                gs = zeros(nbeta1,size(r,1));             

                for j = 1:nbeta1 

                    initb = -10;  % low starting point for beta2   

                    [betalo, fvallo] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, [beta1s(j); b], vart, f, r(:,numr,:), GPME, 1), initb, options);  %fundMCexpaff takes log factors as inputs

                    b = betalo; 
                    [gtlo, dg] = fundMCexpaff_hvar([beta1s(j); b], vart, f, r(:,numr,:), GPME, 0, 1); 
                    varlo = var(gtlo);    % compute variance of alpha for this beta estimate
                    mlo = mean(gtlo); 
                    varlo = varlo + 100000*mlo^2;

                    initb = 10;  % high starting point for beta2
                    [betahi, fvallo] = fsolve(@(b) GMMcritg_hvar(@fundMCexpaff_hvar, [beta1s(j); b], vart, f, r(:,numr,:), GPME, 1), initb, options);  %fundMCexpaff takes log factors as inputs

                    b = betahi; 
                    [gthi, dg] = fundMCexpaff_hvar([beta1s(j); b], vart, f, r(:,numr,:), GPME, 0, 1); 
                    varhi = var(gthi);    % compute variance of alpha for this beta estimate
                    mhi = mean(gthi); 
                    varhi = varhi + 100000*mhi^2; 

                    % pick beta estimate at which variance of alpha is lowest
                    if varlo < varhi    
                        beta2s(j) = betalo;
                        vars(j) = varlo; 
                        ms(j) = mlo; 
                        gs(j,:) = gtlo'; 
                    else
                        beta2s(j) = betahi;
                        vars(j) = varhi; 
                        ms(j) = mhi; 
                        gs(j,:) = gthi'; 
                    end

                end

                [minv,minind] = min(vars); disp(minv)
                beta_est = [beta1s(minind); beta2s(minind)]
                mg = mean(gs,2); 
                mg_est = mg(minind)

                if mg_est > 0.001
                    fprintf('\nERROR: Solution not found')
                    beta_est = [NaN; NaN];
                    gt = NaN(size(alphai_save,1),1);
                else
                    aJ = 1; 
                    [~, Jfix, ~, bvar, ~, ~, pJfix, gt] = GMMcrit_hvar(@fundMCexpaff_hvar, beta_est, vart, f, r(:,numr,:), prc(:,numr), 1, hi, @Smatdatenums, aJ);     
                end

            else
                disp('WARNING: Can only two factors or fewer'); 
            end % if numf == 1

            PMEi_save(:,fm,truebeta) = PMEi;
            alphai_save(:,fm,truebeta) = gt;
            GPMEi_save(:,fm,truebeta)  = GPMEi;

            PMEtildei_save(:,fm,truebeta)  = PMEtilde;  % true pricing error
            
            fundbeta_save(:,fm,truebeta)  = fundbeta;  % true beta
            
        end % for truebeta
    end %for fm               
                       
    

    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Output results
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    disp(['Sample #' num2str(samp)])
    disp(['# Funds = ' num2str(size(PEfund_CF,1))]);
       
    for fm = 1:size(fit_set,1) % cycle through the various factor models used to estimate GPME and alpha
        for truebeta = 1:6
    
            labels = label_set{fm};
            disp('---------------------------------------------');
            disp([labels{1} ', true beta = ' num2str(truebeta/2)]);
            disp('---------------------------------------------');
            s_fm = labels{2};

            PMEi = PMEi_save(:,fm,truebeta);
            GPMEi = GPMEi_save(:,fm,truebeta); 
            alphai = alphai_save(:,fm,truebeta);
            
            PMEtildei = PMEtildei_save(:,fm,truebeta);
        
            f = '%10.4f';
            disp(' ');     
            disp(['Mean true fund beta = ' num2str(mean(fundbeta_save(:,fm,truebeta)),f)])
            disp(' ');     
            disp('Raw metrics')
            disp('          alpha     GPME      PME      true alpha')
            disp(['Mean     ' num2str(mean(alphai),f) '   ' num2str(mean(GPMEi),f) '   ' num2str(mean(PMEi),f) '   ' num2str(mean(PMEtildei),f)]);
            disp(' ')
            disp(['Min      ' num2str(min(alphai),f) '   ' num2str(min(GPMEi),f) '   ' num2str(min(PMEi),f) '   ' num2str(min(PMEtildei),f)]);
            disp(['10th     ' num2str(prctile(alphai,10),f) '   ' num2str(prctile(GPMEi,10),f) '   ' num2str(prctile(PMEi,10),f) '   ' num2str(prctile(PMEtildei,10),f)]);
            disp(['25th     ' num2str(prctile(alphai,25),f) '   ' num2str(prctile(GPMEi,25),f) '   ' num2str(prctile(PMEi,25),f) '   ' num2str(prctile(PMEtildei,25),f)]);
            disp(['Median   ' num2str(median(alphai),f) '   ' num2str(median(GPMEi),f) '   ' num2str(median(PMEi),f) '   ' num2str(median(PMEtildei),f)]);
            disp(['75th     ' num2str(prctile(alphai,75),f) '   ' num2str(prctile(GPMEi,75),f) '   ' num2str(prctile(PMEi,75),f) '   ' num2str(prctile(PMEtildei,75),f)]);
            disp(['90th     ' num2str(prctile(alphai,90),f) '   ' num2str(prctile(GPMEi,90),f) '   ' num2str(prctile(PMEi,90),f) '   ' num2str(prctile(PMEtildei,90),f)]);
            disp(['Max      ' num2str(max(alphai),f) '   ' num2str(max(GPMEi),f) '   ' num2str(max(PMEi),f) '   ' num2str(max(PMEtildei),f)]);
            disp(' ')
            disp(['St.Dev.  ' num2str(std(alphai),f) '   ' num2str(std(GPMEi),f) '   ' num2str(std(PMEi),f) '   ' num2str(std(PMEtildei),f)]);
            disp(['Skewness ' num2str(skewness(alphai),f) '   ' num2str(skewness(GPMEi),f) '   ' num2str(skewness(PMEi),f) '   ' num2str(skewness(PMEtildei),f)]);
            disp(['Kurtosis ' num2str(kurtosis(alphai),f) '   ' num2str(kurtosis(GPMEi),f) '   ' num2str(kurtosis(PMEi),f) '   ' num2str(kurtosis(PMEtildei),f)]);  

            disp(' ')
            disp('Correlations:')
            correl = corrcoef(alphai, PMEtildei);
            disp(['Corr(alpha, true alpha) = ' num2str(correl(1,2))]);
            correl = corrcoef(GPMEi, PMEtildei);
            disp(['Corr(GPME, true alpha) = ' num2str(correl(1,2))]);
            correl = corrcoef(PMEi, PMEtildei);
            disp(['Corr(PME, true alpha) = ' num2str(correl(1,2))]);

            disp(' ')
            disp('RMSE:')
            disp(['alpha vs true alpha = ' num2str(sqrt(mean((alphai-PMEtildei).^2)),f)]);
            disp(['GPME vs true alpha  = ' num2str(sqrt(mean((GPMEi-PMEtildei).^2)),f)]);
            disp(['PME vs true alpha   = ' num2str(sqrt(mean((PMEi-PMEtildei).^2)),f)]);
        
            disp(' ')
            disp('Mean absolute deviation:')
            disp(['alpha vs true alpha = ' num2str(mean(abs(alphai-PMEtildei)),f)]);
            disp(['GPME vs true alpha  = ' num2str(mean(abs(GPMEi-PMEtildei)),f)]);
            disp(['PME vs true alpha   = ' num2str(mean(abs(PMEi-PMEtildei)),f)]);

            disp(' ')
            disp('Median absolute deviation:')
            disp(['alpha vs true alpha = ' num2str(median(abs(alphai-PMEtildei)),f)]);
            disp(['GPME vs true alpha  = ' num2str(median(abs(GPMEi-PMEtildei)),f)]);
            disp(['PME vs true alpha   = ' num2str(median(abs(PMEi-PMEtildei)),f)]);
                    
            disp(' ');
            disp('Metric - true alpha')
            disp('          alpha     GPME      PME')       
            disp(['Mean     ' num2str(mean(alphai-PMEtildei),f) '   ' num2str(mean(GPMEi-PMEtildei),f) '   ' num2str(mean(PMEi-PMEtildei),f)]);
            disp(' ')
            disp(['Min      ' num2str(min(alphai-PMEtildei),f) '   ' num2str(min(GPMEi-PMEtildei),f) '   ' num2str(min(PMEi-PMEtildei),f)]);
            disp(['10th     ' num2str(prctile(alphai-PMEtildei,10),f) '   ' num2str(prctile(GPMEi-PMEtildei,10),f) '   ' num2str(prctile(PMEi-PMEtildei,10),f)]);
            disp(['25th     ' num2str(prctile(alphai-PMEtildei,25),f) '   ' num2str(prctile(GPMEi-PMEtildei,25),f) '   ' num2str(prctile(PMEi-PMEtildei,25),f)]);
            disp(['Median   ' num2str(median(alphai-PMEtildei),f) '   ' num2str(median(GPMEi-PMEtildei),f) '   ' num2str(median(PMEi-PMEtildei),f)]);
            disp(['75th     ' num2str(prctile(alphai-PMEtildei,75),f) '   ' num2str(prctile(GPMEi-PMEtildei,75),f) '   ' num2str(prctile(PMEi-PMEtildei,75),f)]);
            disp(['90th     ' num2str(prctile(alphai-PMEtildei,90),f) '   ' num2str(prctile(GPMEi-PMEtildei,90),f) '   ' num2str(prctile(PMEi-PMEtildei,90),f)]);
            disp(['Max      ' num2str(max(alphai-PMEtildei),f) '   ' num2str(max(GPMEi-PMEtildei),f) '   ' num2str(max(PMEi-PMEtildei),f)]);
            disp(' ')
            disp(['St.Dev.  ' num2str(std(alphai-PMEtildei),f) '   ' num2str(std(GPMEi-PMEtildei),f) '   ' num2str(std(PMEi-PMEtildei),f)]);
            disp(['Skewness ' num2str(skewness(alphai-PMEtildei),f) '   ' num2str(skewness(GPMEi-PMEtildei),f) '   ' num2str(skewness(PMEi-PMEtildei),f)]);
            disp(['Kurtosis ' num2str(kurtosis(alphai-PMEtildei),f) '   ' num2str(kurtosis(GPMEi-PMEtildei),f) '   ' num2str(kurtosis(PMEi-PMEtildei),f)]);  
                      
            disp(' ');
            disp(' ');
            disp(' ');
            


        end % for truebeta
    
    end % for fm
    
end % for samp

toc



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Done, do clean up: delete the temporary data file and save the diary
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if ~debug && exist('temp.mat','file')
    delete('temp.mat');    % delete temporary file, if it exists
end

diary off   % this saves the screen output



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Error handling: ensure diary is saved and contains basic error description
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
catch err 
    disp(' ')
    disp(err.identifier)
    disp(err.message)
    disp(['Line ' num2str(err.stack(end).line)]);
    
    % clean up the temporary data file
    if ~debug && exist('temp.mat','file')
        delete('temp.mat');    % delete temporary file, if it exists
    end
    
    diary off       % this saves the screen output
    
    rethrow(err); 
end
